/************************************
        Extended Kalman Filter
        Matrix operations

        V. Smidl, Z. Peroutka

Rev. 28.10.2010   (ZP)

26.10.2010      Prvni verze (VS)

26.10.2010      Upravena chyba v Thorton_fast - spatne shiftovani o vypoctu SIGMA.
27.10.2010      Pokus o odstraneni problemu v Thorton_fast - potize dela omezovani (orezavani) varianci.
28.10.2010      Drobne upravy v kodu.

*************************************/

#include "matrix_vs.h"
#include "qmath.h"

#define HIGH_PRECISION	1

void householder(int16 *Ch, int16 *Q, unsigned int16 dimx) 
{
    int16 k,j,i;
    int16 alpha,beta;
    int32  sigma; // 2*qCh
    int32  tmp_long;
    int16 B[25];//Q in qCh
    int16 w[5];
    int16 v[5];

    int16 *B_ij, *Q_i, *B_kj, *Ch_kj, *Ch_ij, *w_j, *v_j;

    B_ij=B;
    Q_i=Q;
    // copy Q to B
    for (i=0;i<dimx*dimx;i++)
    {
       *B_ij++=(*Q_i++)>>(15-qCh);
    }

    for (k=dimx-1; k>=0; k--)
    {
        sigma=0;
        B_kj=B+k*dimx+k;
        Ch_kj=Ch+k*dimx;

        for (j=k;j<dimx ;j++)
        {
            sigma+=((long)*B_kj**B_kj);
            B_kj++;
        }

        for (j=0;j<=k;j++)
        {
            sigma+=((long)*Ch_kj**Ch_kj);
            Ch_kj++;
        }

        //alpha in qCh
//        alpha = (int)(sqrt((double)sigma)+0.5);   // verze pro PC
        alpha = qsqrt(sigma);                     // verze pro DSP

	sigma=0;

        w_j=w;
        B_kj=B+k*dimx;

        for (j=0;j<dimx;j++)
        {
            *w_j=*B_kj;
            sigma+=(long)*w_j**w_j;
            w_j++;
            B_kj++;
        }

        v_j=v;
        Ch_kj=Ch+k*dimx;

        for (j=0; j<=k;j++)
        {
            if (j==k) {
                *v_j=*Ch_kj-alpha;
            } else {
                *v_j=*Ch_kj;
            }
            sigma+=(long)*v_j**v_j;

            v_j++;
            Ch_kj++;
        }

        alpha=sigma>>(qCh+1); // alpha = sigma /2;
        if (alpha==0) alpha =1;

        for (i=0;i<=k;i++)
        {
            sigma=0;

            B_ij=B+i*dimx+i;
            w_j=w+i;

            for (j=i;j<dimx;j++)
            {
                sigma+=((long)*B_ij**w_j);

                B_ij++;
                w_j++;
            }

            Ch_ij = Ch + i*dimx;
            v_j=v;

            for (j=0;j<=k;j++)
            {
                sigma+=(long)*Ch_ij**v_j;

                Ch_ij++;
                v_j++;
            }

            sigma = sigma >> 15;               // navrat do Q15
            if (sigma>32767) sigma=32767;


            B_ij=B+i*dimx+i;
            w_j=w+i;

            for (j=i;j<dimx;j++)
            {
		tmp_long=((long)*B_ij*alpha-sigma**w_j)/alpha;
		if (tmp_long>32767)
		   tmp_long=32767;
		if (tmp_long<-32768)
                   tmp_long=-32768;
		*B_ij++=tmp_long;
		w_j++;
            };

	    Ch_ij=Ch+i*dimx;
            v_j=v;

            for (j=0;j<=k;j++)
            {
                tmp_long=((long)*Ch_ij*alpha-sigma**v_j)/alpha;
		if (tmp_long>32767)
		   tmp_long=32767;
		if (tmp_long<-32768)
		   tmp_long=-32768;
		*Ch_ij++=tmp_long;
		v_j++;
            }
        }
    }
}

void carlson(int16 *difz, int16 *xp, int16 *Ch, int16 *R, unsigned int16 dimy, unsigned int16 dimx ) {
    int16 alpha,beta,gamma;
    int16 delta, eta,epsilon,zeta,sigma,tau;
    int16 i,j,iy;
    int16 w[5];
	int32 tmp_long;

	int16 *Ch_ij, *w_i, *x_i, *Ch_iyj;


    for (iy=0; iy<dimy; iy++)
    {
        alpha=R[iy];
        delta = difz[iy];

		Ch_iyj=Ch+iy*dimx;

        for (j=0;j<dimx;j++)
        {
            sigma=*Ch_iyj++;
            beta=alpha;
//            alpha+=((long)sigma*sigma)>>15;
            alpha=(((long)alpha<<15)+(long)sigma*sigma)>>15;				// vyssi presnost
            gamma= qsqrt(((long)alpha*beta));            // predelat v DSP
            w[j]=0;

			Ch_ij=Ch+j;
			w_i=w;

            for (i=0;i<=j;i++) 
            {
				tau=*Ch_ij;
				tmp_long=((long)beta**Ch_ij -(long)sigma**w_i)/gamma;
			
/*				*Ch_ij=tmp_long;
				if (tmp_long>32767) 
					*Ch_ij=32767;
				if (tmp_long<-32768)
					*Ch_ij=-32768;	/**/
				if (tmp_long>32767) 
					tmp_long=32767;
				if (tmp_long<-32768)
					tmp_long=-32768;
				*Ch_ij=tmp_long;
			
//                w_i+=((long)tau*sigma)>>15;
                *w_i=(((long)*w_i<<15)+(long)tau*sigma)>>15;

				w_i++;
				Ch_ij+=dimx;
            }
        }

		x_i=xp;
		w_i=w;
        for (i=0;i<dimx;i++) {
//            xp[i]+=((long)w[i]*delta)/alpha;
//            *x_i+=((long)*w_i*delta)/alpha;
            *x_i=((long)*x_i*alpha+(long)*w_i*delta)/alpha;		// vyssi presnost
			x_i++;
			w_i++;
        }
    }
}

/* Matrix multiply Full matrix by upper diagonal matrix; */
void mmultACh(int16 *m1, int16 *up, int16 *result, unsigned int16 rows, unsigned int16 columns) {
	unsigned int16 i, j, k;
	int32 tmp_sum=0L;
	int16 *m2pom;
	int16 *m1pom=m1;
	int16 *respom=result;
	
	for (i=0; i<rows; i++) //rows of result
    {
		for (j=0; j<columns; j++) //columns of result
        {
			m2pom=up+j;
			
			for (k=0; k<=j; k++) //inner loop up to "j" - U(j,j)==1;
            {
				tmp_sum+=(int32)(*(m1pom++))**m2pom;
				m2pom+=columns;
			}
			m1pom-=(j+1); // shift back to first element

			*respom++=tmp_sum>>15;

			tmp_sum=0;
		}
		m1pom+=(columns);
	}
}

void givens(int16 *Ch, int16 *Q, unsigned int16 dimx)
{
	int16 i,j,k;
	int16 rho,s,c,tau;
	int32  tmp_long;

	int16 A[25];//beware

	int16 *A_ij, *Q_i, *Ch_ki, *Ch_kj, *Ch_ii, *Ch_ij, *A_kj;

	A_ij=A;
	Q_i=Q;
	// copy Q to A
	for (i=0;i<dimx*dimx;i++)
	{
	  *A_ij++=(*Q_i++)>>(15-qCh);
	}

	for (i=dimx-1; i>=0; i--)
	{
		Ch_ii=Ch+i*dimx+i;
		A_ij=A+i*dimx;

		for (j=0; j<dimx; j++)
		{
                    if (*A_ij!=0)
                    {
			tmp_long=(long)*Ch_ii**Ch_ii+(long)*A_ij**A_ij;

			rho=qsqrt(tmp_long);
			s=((long)*A_ij<<15)/rho;
			c=((long)*Ch_ii<<15)/rho;

			Ch_ki=Ch+i;
			A_kj=A+j;

			for (k=0;k<=i; k++)
			{
   			    tau=((long)c**A_kj-(long)s**Ch_ki)>>15;
			    *Ch_ki=((long)s**A_kj+(long)c**Ch_ki)>>15;
			    *A_kj=tau;

			    Ch_ki+=dimx;
			    A_kj+=dimx;
                         }

                    }
		    
                    A_ij++;
		}

	Ch_ij = Ch+i*dimx;

	for (j=0; j<i; j++)
	{
		if (*Ch_ij!=0)
		{
                   tmp_long=(long)*Ch_ii**Ch_ii+(long)*Ch_ij**Ch_ij;
		   rho=qsqrt(tmp_long);
		   s=((long)*Ch_ij<<15)/rho;
		   c=((long)*Ch_ii<<15)/rho;

		   Ch_kj = Ch + j;
		   Ch_ki = Ch + i;

		   for (k=0; k<=i; k++)
		   {
		       tau=((long)c**Ch_kj-(long)s**Ch_ki)>>15;
		       *Ch_ki =((long)s**Ch_kj+(long)c**Ch_ki)>>15;
		       *Ch_kj=tau;

		       	Ch_kj += dimx;
			Ch_ki += dimx;
                  }
		}

		Ch_ij++;
	}
  }
}
